package com.liuqi.openai.data.document.splitter;

import com.liuqi.openai.data.document.DocumentSplitter;

/**
 * 按段落分割
 *
 * 段落边界通过至少两个换行符（"\n\n"）来识别, 前后或中间的任何额外空白字符都会被忽略.
 * 以下示例均为有效的段落分隔符："\n\n"、"\n\n\n"、"\n \n"、" \n \n " 等
 *
 * @author liuqi
 * @date 2025/8/11
 **/
public class ParagraphDocumentSplitter extends HybridDocumentSplitter {

    public ParagraphDocumentSplitter() {
        this(null);
    }

    public ParagraphDocumentSplitter(DocumentSplitter subSplitter) {
        super(subSplitter);
    }

    @Override
    protected String[] split(String text) {
        return text.split("\\s*\\R\\s*\\R\\s*");
    }

}
